# 声明：本代码仅供学习和研究目的使用。使用者应遵守以下原则：
# 1. 不得用于任何商业用途。
# 2. 使用时应遵守目标平台的使用条款和robots.txt规则。
# 3. 不得进行大规模爬取或对平台造成运营干扰。
# 4. 应合理控制请求频率，避免给目标平台带来不必要的负担。
# 5. 不得用于任何非法或不当的用途。
#
# 详细许可条款请参阅项目根目录下的LICENSE文件。
# 使用本代码即表示您同意遵守上述原则和LICENSE中的所有条款。


# 基础配置
PLATFORM = "dy"
KEYWORDS = "编程副业,编程兼职"  # 关键词搜索配置，以英文逗号分隔
LOGIN_TYPE = "qrcode"  # qrcode or phone or cookie
COOKIES = ""
# 具体值参见media_platform.xxx.field下的枚举值，暂时只支持小红书
SORT_TYPE = "popularity_descending"
# 具体值参见media_platform.xxx.field下的枚举值，暂时只支持抖音
PUBLISH_TIME_TYPE = 0
CRAWLER_TYPE = (
    "search"  # 爬取类型，search(关键词搜索) | detail(帖子详情)| creator(创作者主页数据)
)
# 自定义User Agent（暂时仅对XHS有效）
UA = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36'

# 是否开启 IP 代理
ENABLE_IP_PROXY = False

# 未启用代理时的最大爬取间隔，单位秒（暂时仅对XHS有效）
CRAWLER_MAX_SLEEP_SEC = 2

# 代理IP池数量
IP_PROXY_POOL_COUNT = 2

# 代理IP提供商名称
IP_PROXY_PROVIDER_NAME = "kuaidaili"

# 设置为True不会打开浏览器（无头浏览器）
# 设置False会打开一个浏览器
# 小红书如果一直扫码登录不通过，打开浏览器手动过一下滑动验证码
# 抖音如果一直提示失败，打开浏览器看下是否扫码登录之后出现了手机号验证，如果出现了手动过一下再试。
HEADLESS = False

# 是否保存登录状态
SAVE_LOGIN_STATE = True

# 数据保存类型选项配置,支持三种类型：csv、db、json, 最好保存到DB，有排重的功能。
SAVE_DATA_OPTION = "db"  # csv or db or json

# 用户浏览器缓存的浏览器文件配置
USER_DATA_DIR = "browser_data/%s_user_data_dir"  # %s will be replaced by platform name

# 爬取开始页数 默认从第一页开始
START_PAGE = 1

# 爬取视频/帖子的数量控制
CRAWLER_MAX_NOTES_COUNT = 40

# 并发爬虫数量控制
MAX_CONCURRENCY_NUM = 1

# 是否开启爬图片模式, 默认不开启爬图片
ENABLE_GET_IMAGES = False

# 是否开启爬评论模式, 默认开启爬评论
ENABLE_GET_COMMENTS = True

# 爬取一级评论的数量控制(单视频/帖子)
CRAWLER_MAX_COMMENTS_COUNT_SINGLENOTES = 10

# 是否开启爬二级评论模式, 默认不开启爬二级评论
# 老版本项目使用了 db, 则需参考 schema/tables.sql line 287 增加表字段
ENABLE_GET_SUB_COMMENTS = False

# 已废弃⚠️⚠️⚠️指定小红书需要爬虫的笔记ID列表
# 已废弃⚠️⚠️⚠️ 指定笔记ID笔记列表会因为缺少xsec_token和xsec_source参数导致爬取失败
# XHS_SPECIFIED_ID_LIST = [
#     "66fad51c000000001b0224b8",
#     # ........................
# ]

# 指定小红书需要爬虫的笔记URL列表, 目前要携带xsec_token和xsec_source参数
XHS_SPECIFIED_NOTE_URL_LIST = [
    "https://www.xiaohongshu.com/explore/66fad51c000000001b0224b8?xsec_token=AB3rO-QopW5sgrJ41GwN01WCXh6yWPxjSoFI9D5JIMgKw=&xsec_source=pc_search"
    # ........................
]

# 指定抖音需要爬取的ID列表
DY_SPECIFIED_ID_LIST = [
    "7280854932641664319",
    "7202432992642387233",
    # ........................
]

# 指定快手平台需要爬取的ID列表
KS_SPECIFIED_ID_LIST = ["3xf8enb8dbj6uig", "3x6zz972bchmvqe"]

# 指定B站平台需要爬取的视频bvid列表
BILI_SPECIFIED_ID_LIST = [
    "BV1d54y1g7db",
    "BV1Sz4y1U77N",
    "BV14Q4y1n7jz",
    # ........................
]

# 指定微博平台需要爬取的帖子列表
WEIBO_SPECIFIED_ID_LIST = [
    "4982041758140155",
    # ........................
]

# 指定weibo创作者ID列表
WEIBO_CREATOR_ID_LIST = [
    "7976676547"
]

# 指定weibo获取粉丝和关注列表(sec_id)
WEIBO_RELATION_ID_LIST = [
    '3254988854',
'5044281310',
'1904328862',
'7348359537',
'3279710155',
'7737164717',
'2959386434',
'7796073639',
'1471416721',
'1746221281',
'5572566998',
'7883130603',
'2422151692',
'1788935762',
'2660807713',
'6257986605',
'1234669437',
'1761731257',
'1801539801',
'1934094910',
'1774314942',
'1657996683',
'2072724293',
'5881975508',
'1174132502',
'7788219092',
'1445403190',
'3810045758',
'7785431482',
'2933491427',
'1735534331',
'6221670060',
'6557588519',
'5632180436',
'2017327511',
'5625609005',
'5972729846',
'3554444773',
'7405898003',
'1735546403',
'2657550845',
'1144393351',
'7917419483',
'2707480475',
'1154258881',
'5198958232',
'2045775993',
'2036784973',
'1694759962',
'5575798150',
'5202133435',
'6001853980',
'2861590343',
'1777670117',
'7941457255',
'7754177906',
'1590065310',
'7747056179',
'3909383956',
'2086220694',
'5933392166',
'1275353723',
'5777632138',
'5977321619',
'7751973672',
'3904126253',
'5751181769',
'1645215240',
'7306637230',
'1448589414',
'1574736884',
'2143622550',
'5892975692',
'2664689831',
'6895880255',
'5896776628',
'1961152803',
'3916029817',
'2658296723',
'2233211910',
'2245969614',
'2791919213',
'1736287852',
'6569076166',
'7904033518',
'3832998264',
'1778375693',
'5083533502',
'1582297713',
'2030864003',
'7981077977',
'5626402362',
'5253090679',
'5082632063',
'1618015624',
'7884736308',
'6150389364',
'2696594545',
'1963106735',
'6422918103',
'7496877522',
'3237522444',
'1657765690',
'7569671516',
'7899068402',
'5297830810',
'7188396136',
'6605420925',
'5127477345',
'5973349828',
'1649173367',
'5821279480',
'5994003317',
'6787937484',
'6062260921',
'7745669802',
'5622841182',
'1687340260',
'6268162387',
'3163693955',
'3771251624',
'6670392036',
'1933646343',
'1456115034',
'1645504501',
'6739163134',
'5593590237',
'7322674601',
'2963774131',
'6502058142',
'5996162984',
'1915217337',
'6735168701',
'1769519235',
'6609612161',
'7172242362',
'1772293130',
'1309138161',
'6460024647',
'5876047128',
'2524620932',
'1898907123',
'5594987868',
'1405562072',
'2702772241',
'1706596590',
'1102380772',
'7741776528',
'6443495545',
'7557332360',
'6063046645',
'6762044201',
'2676140525',
'5637236373',
'7463623161',
'5533390220',
'7323156393',
'1095240537',
'7479392783',
'7832859160',
'2802989774',
'3915015388',
'6287139922',
'1994962631',
'2712644232',
'6606499450',
'2400900467',
'6126578118',
'5836117168',
'6875924987',
'7318123124',
'6430276906',
'7898581906',
'6923905607',
'6125811604',
'2659072871',
'76649',
'7844233609',
'6523396419',
'7892445368',
'7778000864',
'6083151801',
'5822247710',
'7049146899',
'7849354435',
'2433609597',
'1400145860',
'7657561864',
'1672797627',
'7747386853',
'7308497267',
'6864354594',
'7673582024',
'7821253748',
'7895921285',
'3227474685',
'6234232383',
'2559489003',
'2124395187',
'7938340264',
'1769469393',
'7191735631',
'6912435519',
'1298535315',
'7558002525',
'6154400000',
'1678537572',
'6020311121',
'5866153310',
'2091130370',
'2271315723',
'7184284792',
'1705126943',
'1012502005',
'2297476432',
'6462712655',
'2235121680',
'2306808654',
'3557562270',
'3062916494',
'6468110495',
'6475203060',
'2504179791',
'3131509067',
'3900043513',
'6386051879',
'6010630451',
'5271138504',
'6311526625',
'7257861776',
'7746756998',
'2770907897',
'6035241974',
'2671012463',
'1364923764',
'5663944416',
'2060826595',
'1196596514',
'7577435405',
'7838274950',
'7847853712',
'1806827651',
'1776216023',
'7322735988',
'7313970640',
'5873913534',
'7941962899',
'5367686790',
'6223797915',
'7496466074',
'7516633331',
'7689399381',
'1784534181',
'6178599595',
'1766830894',
'2710221175',
'1619787425',
'3061560893',
'7311792947',
'2436570427',
'6896025548',
'5958388574',
'6474390440',
'1816461447',
'2010378367',
'7623903681',
'6156653024',
'6091345367',
'5579584073',
'6429872722',
'6819620156',
'5705836856',
'6732525408',
'2109513645',
'6040917361',
'6072247841',
'2828204371',
'1882709442',
'7784591182',
'7801592924',
'7924456059',
'5536422663',
'5695068903',
'2961094933',
'2638394171',
'2678725572',
'5538487605',
'5884152081',
'5936821777',
'2212120781',
'5640622684',
'2801210033',
'7897924634',
'1650069617',
'1826225337',
'6084644994',
'7960401798',
'1977092593',
'1739254867',
'7733214282',
'2383467420',
'1565195075',
'6247933102',
'7933110068',
'7907462344',
'7905576494',
'7417572929',
'7761466638',
'1768521455',
'2096598943',
'6361885943',
'7709295637',
'6856705606',
'6715807371',
'2815980962',
'6217127804',
'7909281199',
'7840784109',
'1654971185',
'6585564208',
'2763624531',
'5332407420',
'2289329243',
'5657271322',
'3029439173',
'3261101232',
'5762999670',
'2477165124',
'6811672042',
'1900287873',
'5233817658',
'7632118102',
'7432920326',
'7395317221',
'5274628707',
'7783319523',
'1951265310',
'7388316221',
'5774091302',
'2944665215',
'3263580352',
'2166124254',
'1843057895',
'7907025724',
'1299408077',
'1810600002',
'7815725248',
'5857014755',
'7837219513',
'5754536449',
'7913375068',
'7083000977',
'5849000493',
'5581662275',
'5292492641',
'7962162736',
'5878659096',
'6231508015',
'7560052386',
'3967140810',
'3752212113',
'7923557047',
'5324467113',
'7943096113',
'1644556265',
'1483727065',
'2538671480',
'7647360354',
'1650435503',
'1869647833',
'5522702239',
'2081100005',
'7481614218',
'7733554711',
'7846006249',
'5495292050',
'1891017245',
'7721218456',
'5504043057',
'5111398092',
'5529284403',
'6896060305',
'5591011978',
'7476717425',
'5173239667',
'2855089844',
'6329807434',
'2771816714',
'5864680286',
'2660952874',
'7913338822',
'1924504435',
'2608921824',
'7827595418',
'6204785563',
'7981251783',
'7869027552',
'6546762147',
'5855474761',
'7944627317',
'1066717223',
'2791812425',
'6328926693',
'1847544665',
'5519255480',
'2834287454',
'2256734103',
'6020862844',
'3475046440',
'5582233022',
'1701517347',
'1442765984',
'7046273934',
'2030064555',
'7372388933',
'7939288454',
'1622168834',
'6233541932',
'6173462130',
'7463953183',
'6467575513',
'1833535585',
'7897298154',
'6973564411',
'3985572533',
'2667228671',
'1871515602',
'5736438989',
'5596623393',
'7628664846',
'7887610857',
'1743969931',
'1594264503',
'7806624858',
'7813862616',
'7952824752',
'7364026406',
'7978234791',
'7360874596',
'1992812411',
'1371163915',
'1297035881',
'3335587582',
'1791606001',
'5853930204',
'1756898981',
'6416483554',
'7818690378',
'5992913938',
'7624318562',
'3779057983',
'1688332107',
'2359887882',
'1846009527',
'1035745131',
'7723670421',
'1771175524',
'2116860432',
'6355338152',
'2116114110',
'2461375583',
'6320679364',
'7773562681',
'7926548648',
'2599070457',
'5977241857',
'3942244676',
'6450992906',
'3277089805',
'2671604972',
'2340990050',
'3986791260',
'7371196958',
'2571905802',
'5334937012',
'5721016930',
'6313746929',
'7950477566',
'6053707640',
'7441877325',
'7943861186',
'7951443422',
'7913766776',
'3989617770',
'7738078384',
'6876869802',
'1829453082',
'3681812032',
'5928911978',
'2156751832',
'3203540865',
'5045769972',
'7729461453',
'7360370223',
'6864959839',
'3168633370',
'7959886368',
'5344855140',
'1821761423',
'6250240563',
'7937449504',
'7968937211',
'7342103011',
'2743094590',
'1763997087',
'5974176293',
'7074633965',
'5941823583',
'6219694218',
'1445543304',
'2537271702',
'6512274260',
'5769529300',
'6302022619',
'5923712878',
'3555919517',
'3979291238',
'6001773928',
'2600201010',
'7876755342',
'1950378184',
'2097881287',
'2838428820',
'6168864835',
'7885830005',
'2148522454',
'7579716475',
'5515434563',
'1196105904',
'6834681135',
'6378490814',
'6238497208',
'7345022636',
'2140311505',
'7724554466',
'7982287549',
'1074577974',
'6336153547',
'7884828504',
'7827876219',
'5253488430',
'3810683249',
'5596059519',
'5678078738',
'7403336048',
'5702556141',
'5617702288',
'7081695396',
'6578931935',
'5643812125',
'1564145902',
'7243495357',
'2583326924',
'5909988678',
'5075861630',
'7475927987',
'5622615137',
'2654898795',
'2907736135',
'1710224707',
'5865659866',
'1015409673',
'1884951592',
'5906706220',
'1040961364',
'7099165441',
'6183171467',
'7976676547',
'3082736394',
'1237434035',
'5088483659',
'2411468974',
'6324226177',
'5999853241',
'6508968033',
'7745649972',
'6665102206',
'1910488870',
'5104826877',
'1080241057',
'7557265043',
'2694542280',
'7845618682',
'2278458312',
'6625385356',
'6081463122',
'6116519048',
'6329001427',
'3298651524',
'6285934437',
'7922757335',
'7568134222',
'7935637408',
'2107713074',
'7713402692',
'6737089726',
'2919830457',
'7872694533',
'6467608517',
'5748038108',
'7745161189',
'6034633476',
'3083678462',
'1303721381',
'7535687408',
'7828297539',
'7903127608',
'6218861255',
'7513518795',
'1600920885',
'7838001457',
'6852943293',
'7380538559',
'7882437403',
'6581822851',
'5996202135',
'7390464854',
'2649424090',
'5263425265',
'7749185279',
'7930760064',
'7841109736',
]

# 指定贴吧需要爬取的帖子列表
TIEBA_SPECIFIED_ID_LIST = []

# 指定贴吧名称列表，爬取该贴吧下的帖子
TIEBA_NAME_LIST = [
    # "盗墓笔记"
]

# 指定贴吧创作者URL列表
TIEBA_CREATOR_URL_LIST = [
    "https://tieba.baidu.com/home/main/?id=tb.1.7f139e2e.6CyEwxu3VJruH_-QqpCi6g&fr=frs",
    # ........................
]

# 指定小红书创作者ID列表
XHS_CREATOR_ID_LIST = [
    "63e36c9a000000002703502b",
    # ........................
]

# 指定Dy创作者ID列表(sec_id)
DY_CREATOR_ID_LIST = [
    "MS4wLjABAAAAbJOVGQULdY8QarP9RFMXWnpcVv0SAp4rlDEaZiz9slxFq1Z1cpJqo-zXLFvZtfVo",
    # ........................
]
# 指定Dy获取粉丝和关注列表(sec_id)
DY_RELATION_ID_LIST = [
    {
        "sec_user_id": "MS4wLjABAAAAXQeDCj8vAMC6dSMl3zf5oh5AXDuHCmmQqKA19Tg8CTdkQyTtGdn-PEqRLgVRuI0Q",
        "user_id": "3368537682883892"
    },
    {
        "sec_user_id": "MS4wLjABAAAAbJOVGQULdY8QarP9RFMXWnpcVv0SAp4rlDEaZiz9slxFq1Z1cpJqo-zXLFvZtfVo",
        "user_id": "3844373616272056"
    }
]

# 指定bili创作者ID列表(sec_id)
BILI_CREATOR_ID_LIST = [
    "20813884",
    # ........................
]

# 指定快手创作者ID列表
KS_CREATOR_ID_LIST = [
    "3x4sm73aye7jq7i",
    # ........................
]


# 指定知乎创作者主页url列表
ZHIHU_CREATOR_URL_LIST = [
    "https://www.zhihu.com/people/yd1234567",
    # ........................
]

# 指定知乎需要爬取的帖子ID列表
ZHIHU_SPECIFIED_ID_LIST = [
    "https://www.zhihu.com/question/826896610/answer/4885821440", # 回答
    "https://zhuanlan.zhihu.com/p/673461588", # 文章
    "https://www.zhihu.com/zvideo/1539542068422144000" # 视频
]

# 词云相关
# 是否开启生成评论词云图
ENABLE_GET_WORDCLOUD = False
# 自定义词语及其分组
# 添加规则：xx:yy 其中xx为自定义添加的词组，yy为将xx该词组分到的组名。
CUSTOM_WORDS = {
    "零几": "年份",  # 将"零几"识别为一个整体
    "高频词": "专业术语",  # 示例自定义词
}

# 停用(禁用)词文件路径
STOP_WORDS_FILE = "./docs/hit_stopwords.txt"

# 中文字体文件路径
FONT_PATH = "./docs/STZHONGS.TTF"

# 爬取开始的天数，仅支持 bilibili 关键字搜索，YYYY-MM-DD 格式，若为 None 则表示不设置时间范围，按照默认关键字最多返回 1000 条视频的结果处理
START_DAY = '2024-01-01'

# 爬取结束的天数，仅支持 bilibili 关键字搜索，YYYY-MM-DD 格式，若为 None 则表示不设置时间范围，按照默认关键字最多返回 1000 条视频的结果处理
END_DAY = '2024-01-01'

# 是否开启按每一天进行爬取的选项，仅支持 bilibili 关键字搜索
# 若为 False，则忽略 START_DAY 与 END_DAY 设置的值
# 若为 True，则按照 START_DAY 至 END_DAY 按照每一天进行筛选，这样能够突破 1000 条视频的限制，最大程度爬取该关键词下的所有视频
ALL_DAY = False